********************************************************************************
********************************************************************************
****    .do: problem_with_crim_records_criminology.do
****    
****    Title: Problem with Criminal Records
****    Journal: Criminology
****    
****    Written on 2023/09/11
****     
****    Written by Robert Stewart (robstew@umd.edu), University of Maryland
********************************************************************************
clear
frames reset
set more off
drop _all

net install unique, force // in case unique is not yet installed

cd "G:\Shared drives\Criminal Accuracy Shared Drive\Criminology\final"

global WORKING_FOLDER `c(pwd)' // enter working folder here 

use "$WORKING_FOLDER/CRIM_temp_dataset.dta", clear

frame rename default main

* Suffixes:
*	'_sr' = State Report
*	'_bc' = Background Check Company
*	'_da' = Data Aggregator

***************** TABLE 2 - Charges and Dispositions by Source
* All Charges
foreach x in "charges" "convs" "dism" "unk" {
    eststo: estpost tabstat `x'_sr `x'_bc `x'_da, stat(sum) c(v)
}

* Charges by participant
// Participants with charges
unique pid if source_sr == 1 // 101
unique pid if source_bc == 1
unique pid if source_da == 1
// Participant Dispositions
// - 'cd_': Charge disposition
//          (1 = Convicted, 
//           6 = Acquitted/dismissed, 
//           9 = Unknown)

// Convicted
unique pid if cd_sr == 1
unique pid if cd_bc == 1
unique pid if cd_da == 1
// Acquitted/dismissed
unique pid if cd_sr == 6
unique pid if cd_bc == 6
unique pid if cd_da == 6
// Unknown
unique pid if cd_sr == 9
unique pid if cd_bc == 9
unique pid if cd_da == 9


***************** FIGURE 1
** Create frame for record comparison graphic
frame put pid source_sr cd_sr source_bc cd_bc source_da cd_da first_row combined, into(nj_all_counts)
*frame nj_all_counts: export delimited using "$WORKING_FOLDER/pwcr_fig_1_data.csv", replace
// [Contact Robert Stewart (robstew@umd.edu) for python code to create Figure 1]

***************** TABLE 2 - False Positive and False Negative Rates
frame change nj_all_counts
*****************
* CHARGE LEVEL
** False Negatives (reported by the SR but not the private sector)
// Calculate for BC
qui count if source_bc == 0 & source_sr == 1
qui local FN_BC = `r(N)'
qui count if source_sr == 1
qui local TOT_SR = `r(N)'
local FN_BC_p = `FN_BC' / `TOT_SR'

// Calculate for DA
qui count if source_da == 0 & source_sr == 1
qui local FN_DA = `r(N)'
qui count if source_sr == 1
qui local TOT_SR = `r(N)'
local FN_DA_p = `FN_DA' / `TOT_SR'

// Calculate for Either
qui count if (source_bc == 0 & source_da == 0) & source_sr == 1
qui local FN_EITHER = `r(N)'
qui count if source_sr == 1
qui local TOT_SR = `r(N)'
local FN_EITHER_p = `FN_EITHER' / `TOT_SR'

// Create a matrix to hold the values
//matrix A = (`TOT_SR', `FN_BC', `FN_BC_p' \ `TOT_SR', `FN_DA', `FN_DA_p' \ `TOT_SR', `FN_EITHER', `FN_EITHER_p')

matrix A = (`TOT_SR', `TOT_SR', `TOT_SR' \ `FN_BC', `FN_DA', `FN_EITHER' \ `FN_BC_p', `FN_DA_p' , `FN_EITHER_p')

// Label the rows and columns
matrix colnames A = "BC" "DA" "COMBINED"
matrix rownames A = "Total" "False Neg (n)" "False Neg (%)"

// Display the matrix
matlist A, twidth(20)


** False Negatives (reported by the private sector but not the SR)
qui count if source_bc == 0 & source_sr == 1
qui local FP_BC = `r(N)'
qui count if source_bc == 1
qui local TOT_BC = `r(N)'
local FP_BC_p = `FP_BC' / `TOT_BC'

qui count if source_da == 0 & source_sr == 1
qui local FP_DA = `r(N)'
qui count if source_da == 1
qui local TOT_DA = `r(N)'
local FP_DA_p = `FP_DA' / `TOT_DA'

qui count if (source_bc == 1 | source_da == 1) & source_sr == 0
qui local FP_EITHER = `r(N)'
qui count if source_bc == 1 | source_da == 1
qui local TOT_EITHER = `r(N)'
local FP_NEITHER_p = `FP_EITHER' / `TOT_EITHER'

// Create a matrix to hold the values
//matrix B = (`TOT_BC', `FP_BC', `FP_BC_p' \ `TOT_DA', `FP_DA', `FP_DA_p' \ `TOT_EITHER', `FP_EITHER', `FP_NEITHER_p')

matrix B =(`TOT_BC', `TOT_DA', `TOT_EITHER' \ `FP_BC', `FP_DA', `FP_EITHER' \ `FP_BC_p', `FP_DA_p', `FP_NEITHER_p')
// Label the rows and columns
matrix colnames B = "BC" "DA" "COMBINED"
matrix rownames B = "Total" "False Neg (n)" "False Neg (%)"

// Display the matrix
matlist B, twidth(20)



* PARTICIPANT LEVEL
* False negatives
unique pid if source_sr == 1 // # state report records (denominator)
unique pid if source_bc == 0 & source_sr == 1 // # BCC/Participants false negatives
unique pid if source_da == 0 & source_sr == 1 // # PSS/Particiapnts false negatives
unique pid if (source_bc == 0 | source_da == 0) & source_sr == 1 // # 3rd party/participants false negatives

* False positives
unique pid if source_bc == 1 & source_sr == 0 // # of BCC/participants false positives
unique pid if source_da == 1 & source_sr == 0 // # of PSS/participants false positives
unique pid if (source_bc == 1 | source_da == 1) & source_sr == 0 // # 3rd party/participants false positives

* Any Error
// BCC
unique pid if ///
    (source_bc == 1 & source_sr == 0 | ///
     source_bc == 0 & source_sr == 1) // # of BCC/participants any error
// PSS
unique pid if ///
    (source_da == 1 & source_sr == 0 | ///
     source_da == 0 & source_sr == 1) // # of PSS/participants any error
// Combined
unique pid if ///
    ((source_bc == 1 | source_da == 1) & source_sr == 0 | ///
    ((source_bc == 0 | source_da == 0) & source_sr == 1 )) // # 3rd party/participants any error


****************** Table 3 - Frequency of Offense Type Orphans
frame change main
* Create source_bc_da = orphans from each 3rd party
gen source_bc_da = ///
    cond(source_bc == 1 & source_sr == 0, 1, ///
    cond(source_da == 1 & source_sr == 0, 2, .))
table bjs_description source_bc_da



****************** Proportion of each person's record that are orphans
// Proportion of each participants' records that are orphans
tabstat prop_orphan if first_row == 1, stat(mean median min max sd)

****************** Mismatched dispositions
/* 
"In cases where the charges in the State Report matched the charges
 in the regulated Background Check report (n=651), 34% of the case 
 dispositions did not match (e.g., the State Report listed the charge 
 as dismissed while the Background Check reported the charge as 
 resulting in a conviction). When both the State Report and the 
 unregulated Data Aggregator reported matching criminal charges 
 (n=258), 56% had conflicting case dispositions."
*/
gen diff_bc = cond(cd_sr != cd_bc & cd_sr != . & cd_bc != ., 1, 0 )
gen diff_da = cond(cd_sr != cd_da & cd_sr != . & cd_da != ., 1, 0 )
tab diff_bc if cd_sr != . & cd_bc != . // Only of those that are linked
tab diff_da if cd_sr != . & cd_da != . // Only of those that are linked

